Our group wanted to find the correlation of alcohol consumption in comparison to happiness worldwide. Based
on data gathered from 112 countries, we were able to determine the top five "happiest" countries and the
bottom 5 least "happiest" countries.
The happiness score is based on a scale from 0 to 10 with 10 being the happiest.
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import os
import numpy as np
from scipy.stats import linregress
import gmaps
import plotly.express as px
happiness_path = os.path.join("Resources", "HappinessAlcoholConsumption.csv")
income_path = os.path.join("Resources", "countries_income_group.csv")
country_path = os.path.join("Resources", "world_country_and_usa_states_latitude_and_longitude_values.csv")
happiness_study_df = pd.read_csv(happiness_path)
income_study_df = pd.read_csv(income_path, index_col=[0])
df_lat = pd.read_csv(country_path)
df_lat = df_lat[['latitude','longitude','country']]
df_lat.columns = ['latitude','longitude','Country']
df_lat = pd.merge(happiness_study_df,df_lat,on='Country')
income_study_df.rename(columns = {'Economy' : 'Country'}, inplace = True)
alcohol_study = pd.merge(happiness_study_df, income_study_df, on = "Country")
final_alcohol = alcohol_study.drop(columns=['Code', 'Region_y'])
final_alcohol
| Country | Region_x | Hemisphere | HappinessScore | HDI | GDP_PerCapita | Beer_PerCapita | Spirit_PerCapita | Wine_PerCapita | Income group | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Denmark | Western Europe | north | 7.526 | 928 | 53.579 | 224 | 81 | 278 | High income |
| 1 | Switzerland | Western Europe | north | 7.509 | 943 | 79.866 | 185 | 100 | 280 | High income |
| 2 | Iceland | Western Europe | north | 7.501 | 933 | 60.530 | 233 | 61 | 78 | High income |
| 3 | Norway | Western Europe | north | 7.498 | 951 | 70.890 | 169 | 71 | 129 | High income |
| 4 | Finland | Western Europe | north | 7.413 | 918 | 43.433 | 263 | 133 | 97 | High income |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 107 | Madagascar | Sub-Saharan Africa | south | 3.695 | 517 | 402.000 | 26 | 15 | 4 | Low income |
| 108 | Tanzania | Sub-Saharan Africa | south | 3.666 | 533 | 878.000 | 36 | 6 | 1 | Low income |
| 109 | Liberia | Sub-Saharan Africa | north | 3.622 | 432 | 455.000 | 19 | 152 | 2 | Low income |
| 110 | Benin | Sub-Saharan Africa | north | 3.484 | 512 | 789.000 | 34 | 4 | 13 | Low income |
| 111 | Togo | Sub-Saharan Africa | north | 3.303 | 500 | 577.000 | 36 | 2 | 19 | Low income |
112 rows × 10 columns
# Markdown #1 - What Are the top 5 Happiest Countries
#Filter to find top5 happiest countries
top5 = final_alcohol.loc[final_alcohol['HappinessScore'] > 7.41]
top5
| Country | Region_x | Hemisphere | HappinessScore | HDI | GDP_PerCapita | Beer_PerCapita | Spirit_PerCapita | Wine_PerCapita | Income group | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Denmark | Western Europe | north | 7.526 | 928 | 53.579 | 224 | 81 | 278 | High income |
| 1 | Switzerland | Western Europe | north | 7.509 | 943 | 79.866 | 185 | 100 | 280 | High income |
| 2 | Iceland | Western Europe | north | 7.501 | 933 | 60.530 | 233 | 61 | 78 | High income |
| 3 | Norway | Western Europe | north | 7.498 | 951 | 70.890 | 169 | 71 | 129 | High income |
| 4 | Finland | Western Europe | north | 7.413 | 918 | 43.433 | 263 | 133 | 97 | High income |
#variables for bar graph
x = top5['Country']
y = top5['HappinessScore']
color = ['#1053e3', '#1072e3', '#1092e3','#10abe3','#10bce3']
#plot the bar graph
plt.bar(x,y, color=color)
#add labels and styling to graph
plt.xticks(fontsize=15, fontweight='bold')
plt.xlabel('Countries', fontsize=15, labelpad=35)
plt.ylabel('Happiness Score', fontsize=15, labelpad=35)
plt.title('Top Five Happiest Countries', fontsize=25, loc='center', fontweight='bold')
plt.ylim(7.35,7.55)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
# Markdown #2 - What are the bottom 5 happiest countries?
#Filter to find bottom5 least happiest countries
bottom5 = final_alcohol.loc[final_alcohol['HappinessScore'] <= 3.7]
sortbottom= bottom5.sort_values('HappinessScore', ascending=True)
#variables for graph
x = sortbottom['Country']
y = sortbottom['HappinessScore']
color = ['#E3504D', '#E3754D', '#E39B4D','#E3B94D','#E3D74D']
#plot the graph
plt.bar(x,y, color=color)
#labeling and stying for graph
plt.xticks(fontsize=15, fontweight='bold')
plt.xlabel('Countries', fontsize=15, labelpad=35)
plt.ylabel('Happiness Score', fontsize=15, labelpad=35)
plt.title('Bottom Five Least Happiest Countries',fontsize=25, loc='center', fontweight='bold')
plt.ylim(3.0,3.8)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
# Markdown #3 - Which Alcohol is the most popular?
beer = final_alcohol['Beer_PerCapita'].mean()
spirit = final_alcohol['Spirit_PerCapita'].mean()
wine = final_alcohol['Wine_PerCapita'].mean()
best_alcohol = pd.DataFrame({"Average Beer Per Capita" : [beer],
"Average Spirit Per Capita" : spirit,
"Average Wine Per Capita" : wine})
best_alcohol.head()
| Average Beer Per Capita | Average Spirit Per Capita | Average Wine Per Capita | |
|---|---|---|---|
| 0 | 141.258929 | 100.071429 | 70.276786 |
alcohol = ["Beer", "Spirit", "Wine"]
avg_pc = [141.26, 100.07, 70.28]
x_axis = np.arange(len(avg_pc))
plt.bar(x_axis, avg_pc, color = "g", alpha = .8, align= 'center')
plt.xticks(x_axis, alcohol,fontsize=15, fontweight='bold')
plt.title("Beer vs Wine vs Spirit per Capita",fontsize=25, loc='center', fontweight='bold')
plt.xlabel("Alcohol",fontsize=15, labelpad=35)
plt.ylabel("Average Alcohol Type Per Capita",fontsize=15, labelpad=35)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
# Markdown #4 - For the top 5, what is their alcohol of choice?
top5 = final_alcohol.loc[final_alcohol['HappinessScore']>7.41]
x_axis = np.arange(len(top5['Country']))
plt.bar(x_axis -.10, top5['Beer_PerCapita'], width=0.2, label = 'Beer')
plt.bar(x_axis +.10, top5['Spirit_PerCapita'], width=0.2, label = 'Spirits')
plt.bar(x_axis +.10*3, top5['Wine_PerCapita'], width=0.2, label = 'Wine')
plt.title("Alcohol of Choice for Top Five",fontsize=25, loc='center', fontweight='bold')
plt.xticks(x_axis, top5['Country'],fontsize=15, fontweight='bold')
plt.legend(fontsize=15)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
#Markdown #5 - For bottom 5, what is their alcohol of choice?
bottom5 = final_alcohol.loc[final_alcohol['HappinessScore']<=3.7]
x_axis = np.arange(len(bottom5['Country']))
plt.bar(x_axis -.10, bottom5['Beer_PerCapita'], width=0.2, label = 'Beer')
plt.bar(x_axis +.10, bottom5['Spirit_PerCapita'], width=0.2, label = 'Spirits')
plt.bar(x_axis +.10*3, bottom5['Wine_PerCapita'], width=0.2, label = 'Wine')
plt.title("Alcohol of Choice for Bottom Five",fontsize=25, loc='center', fontweight='bold')
plt.xticks(x_axis, bottom5['Country'],fontsize=15, fontweight='bold')
plt.legend(fontsize=15)
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
# Markdown #6 - Is there a relationship between income level and happiness? Are they highly correlated?
x = final_alcohol['Income group']
y= final_alcohol['HappinessScore']
plt.scatter(x,y,s=500, edgecolor='k', facecolors='#32a852')
plt.ylim(3,8)
plt.xticks(fontsize=15, fontweight='bold')
plt.xlabel('Income Level', fontsize=15, labelpad=35)
plt.ylabel('Happiness Score', fontsize=15, labelpad=35)
plt.title('Income vs. Happiness Score',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
# Calculate the correlation coefficient and linear regression model
# for GDP per Capita and HappinessScore
GDP_per = final_alcohol.iloc[:,5]
Happiness_score = final_alcohol.iloc[:,3]
correlation = st.pearsonr(GDP_per,Happiness_score)
print(f"The correlation between GDP per Capita and Happiness Score is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['HappinessScore']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.5,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Happiness Score", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Happiness Score',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Happiness Score is -0.49.
#Markdown #6 - is there a relationship between GDP and each alcohol of choice?
GDP_per = final_alcohol.iloc[:,5]
Beer_score = final_alcohol.iloc[:,6]
correlation = st.pearsonr(GDP_per,Beer_score)
print(f"The correlation between GDP per Capita and Beer Per Capita is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['Beer_PerCapita']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(200.0,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Beer Per Capita", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Beer Per Capita',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Beer Per Capita is -0.4.
GDP_per = final_alcohol.iloc[:,5]
Wine_score = final_alcohol.iloc[:,8]
correlation = st.pearsonr(GDP_per,Wine_score)
print(f"The correlation between GDP per Capita and Wine Per Capita is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['Wine_PerCapita']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(200.0,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Wine Per Capita", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Wine Per Capita',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Wine Per Capita is -0.21.
GDP_per = final_alcohol.iloc[:,5]
Spirit_score = final_alcohol.iloc[:,7]
correlation = st.pearsonr(GDP_per,Spirit_score)
print(f"The correlation between GDP per Capita and Spirit Per Capita is {round(correlation[0],2)}.")
# Calculate linear regression
x_values = final_alcohol['Spirit_PerCapita']
y_values = final_alcohol['GDP_PerCapita']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
regress_values = x_values * slope + intercept
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(200.0,200.0),fontsize=20,color="red",fontweight='bold')
# Add labels and title to plot
plt.xlabel("Spirit Per Capita", fontsize=15, labelpad=35)
plt.ylabel("GDP per Capita", fontsize=15, labelpad=35)
plt.title('GDP per Capita vs Spirit Per Capita',fontsize=25, loc='center', fontweight='bold')
fig = plt.gcf()
fig.set_size_inches(20,10)
plt.show()
The correlation between GDP per Capita and Spirit Per Capita is -0.28.
# Markdown #7 - Heat Map of Happiness Scores
fig = px.choropleth(df_lat,
locations="Country",
color="HappinessScore",
locationmode = 'country names',
hover_name="Country",
range_color=[0,8],
title='Country vs happiness score')
fig.show()